mle_cir <- function(X_t, X_s, Delta) {
  
  # This function estimates the parameters of a CIR model through MLE
  # Data needs to be a vector X_t and a vector of lagged values X_s
  # Delta denotes the frequency (dt in an SDE), with the convention that Delta = 1/52
  # means Delta = 1 week
  
  # Functions ---------------------------------------------------------------
  
  # This functions computes the conditional density for the CIR model as a
  # function of the initial parameters
  dens_cir <- function(x_t, x_s, dt, alpha, kappa, sigma) {
    # I use the notations from Ait-Sahalia (1999, JF p.1371)
    # dX = kappa * (alpha - X)dt + sigma * sqrt(X)dW
    
    c <- (2 * kappa) / ((sigma^2) * (1 - exp(-kappa * dt)))
    u <- c * x_s * exp(-kappa * dt)
    v <- c * x_t
    q <- (2 * kappa * alpha / (sigma^2)) - 1
    
    # Return the log density to avoid overflows
    # For the same reason, I used the exponentially scaled Bessel function
    p <- log(c) - (u+v) + (q/2) * log(v/u) + 2*sqrt(u*v) + 
      log(besselI(2*sqrt(u*v),
                  nu = q,
                  expon.scaled = TRUE))
    return(p)
  }
  
  # This function computes the log-likelihood of the sample as a function of a vector
  # of parameters theta for the CIR model
  ll_cir <- function(theta, X_t, X_s) {
    stopifnot(length(X_t) == length(X_s))
    
    # theta = (alpha, kappa, sigma) is the vector of parameters
    a <- theta[[1]]
    k <- theta[[2]]
    s <- theta[[3]]
    
    vec_dens <- vapply(seq_along(X_t),
                       function (i) dens_cir(X_t[i], X_s[i], dt = Delta,
                                             alpha = a, kappa = k, sigma = s),
                       numeric(1))
    return(-mean(vec_dens))
  }
  
  # Compute the drift of the model at a value x, given parameters
  drift <- function(x, theta, dt = Delta) {
    kappa <- theta[[1]]
    alpha <- theta[[2]]
    return(kappa * (alpha - x) * dt)
  }
  
  
  
  # MLE estimation ----------------------------------------------------------
  
  # Compute initial values for the parameters. First, for the drift, estimating
  # dX = drift(X, parameters) through NLS
  Y <- X_t - X_s
  nls_drift <- summary(nls(Y ~ drift(X_s, theta),
                           start = list(theta = c(1,0))))
  kappa0 <- nls_drift$coefficients[2,1]
  alpha0 <- nls_drift$coefficients[1,1]
  
  # Then, for the diffusion part, by running a regression of the squared residuals on X
  ols_diffusion <- summary(lm(Y ~ 0 + X,
                              data = list(Y = nls_drift$residuals^2, X = X_s)))
  sigma0 <- sqrt(ols_diffusion$coefficients[[1]] / Delta)
  
  # Collect the initial values
  theta0 <- c(alpha = alpha0,
              kappa = kappa0,
              sigma = sigma0)
  
  # Compute MLE
  
  mle_cir <- optim(theta0,
                   function (x) ll_cir(x, X_t, X_s),
                   hessian = TRUE)
  

  # Diagnostic plots to check (local) optimality ----------------------------

  alpha <- mle_cir[["par"]][["alpha"]]
  kappa <- mle_cir[["par"]][["kappa"]]
  sigma <- mle_cir[["par"]][["sigma"]]
  
  # Plot likelihood function on a discretized version of the interval
  # [parameter * low, parameter * high]
  low <- .8
  high <- 1.2
  
  grid_alpha <- seq(from = alpha * low, to = alpha * high, length.out = 100)
  ll_alpha <- vapply(grid_alpha,
                       function (x) ll_cir(c(alpha = x, kappa = kappa, sigma = sigma),
                                           X_t, X_s),
                       numeric(1))
  
  grid_kappa <- seq(from = kappa * low, to = kappa * high, length.out = 100)
  ll_kappa <- vapply(grid_kappa,
                     function (x) ll_cir(c(alpha = alpha, kappa = x, sigma = sigma),
                                         X_t, X_s),
                     numeric(1))
  
  grid_sigma <- seq(from = sigma * low, to = sigma * high, length.out = 100)
  ll_sigma <- vapply(grid_sigma,
                     function (x) ll_cir(c(alpha = alpha, kappa = kappa, sigma = x),
                                         X_t, X_s),
                     numeric(1))
  
  DT <- data.table(x_alpha = grid_alpha,
                   y_alpha = ll_alpha,
                   x_kappa = grid_kappa,
                   y_kappa = ll_kappa,
                   x_sigma = grid_sigma,
                   y_sigma = ll_sigma)
  
  plot_alpha <- ggplot(DT, aes(x = x_alpha, y = y_alpha)) +
    geom_line() +
    theme_bw() +
    xlab("α") +
    ylab("- Log-likelihood")
  
  plot_kappa <- ggplot(DT, aes(x = x_kappa, y = y_kappa)) +
    geom_line() +
    theme_bw() +
    xlab("κ") +
    ylab("- Log-likelihood")
  
  plot_sigma <- ggplot(DT, aes(x = x_sigma, y = y_sigma)) +
    geom_line() +
    theme_bw() +
    xlab("σ") +
    ylab("- Log-likelihood")
  

  # Final output ------------------------------------------------------------
  
  output_cir <- list(results = mle_cir$par,
                     cov = sqrt(diag(solve(mle_cir$hessian))),
                     log_likelihood = ll_cir(mle_cir$par, X_t, X_s),
                     plot = ggarrange(plot_alpha, plot_kappa, plot_sigma))
  
  return(output_cir)
  
}

# # Generate fake data for the test
X <- runif(100, min = .1, max = 2)
X_t <- X[2:100]
X_s <- X[1:99]
#
# mle_cir(X_t, X_s, 13/52)

